Add a LU decomposition action in BTL and various cleaning in BTL. For instance

all per plot settings have been moved to a single file, go_mean now takes an
optional second argument "tiny" to generate plots for tiny matrices, and
output of comparison information wrt to previous benchs (if any).
This commit is contained in:
Gael Guennebaud 2008-08-04 23:12:48 +00:00
parent c2f8ecf466
commit a7a05382d1
40 changed files with 327 additions and 109 deletions

View File

@ -9,7 +9,8 @@ OPTION(BTL_NOVEC "Disable SSE/Altivec optimizations when possible" OFF)
SET(CMAKE_INCLUDE_CURRENT_DIR ON)
IF(CMAKE_COMPILER_IS_GNUCXX)
string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER})
IF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG")
SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG")
IF(NOT BTL_NOVEC)
@ -18,7 +19,12 @@ IF(CMAKE_COMPILER_IS_GNUCXX)
ELSE(NOT BTL_NOVEC)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE")
ENDIF(NOT BTL_NOVEC)
ENDIF(CMAKE_COMPILER_IS_GNUCXX)
ENDIF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
if(IS_ICPC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fast")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fast")
endif(IS_ICPC)
include_directories(
${PROJECT_SOURCE_DIR}/actions
@ -76,3 +82,4 @@ add_subdirectory(libs/STL_algo)
add_subdirectory(data)

View File

@ -102,7 +102,7 @@ public :
if (error>1.e-6){
INFOS("WRONG CALCULATION...residual=" << error);
exit(0);
exit(1);
}
}

View File

@ -85,7 +85,9 @@ public :
}
inline void calculate( void ) {
asm("#mybegin axpby");
Interface::axpby(_alpha,X,_beta,Y,_size);
asm("#myend axpby");
}
void check_result( void ){

View File

@ -96,7 +96,9 @@ public :
}
inline void calculate( void ) {
Interface::axpy(_coef,X,Y,_size);
asm("#mybegin axpy");
Interface::axpy(_coef,X,Y,_size);
asm("#myend axpy");
}
void check_result( void ){

View File

@ -0,0 +1,124 @@
//=====================================================
// File : action_lu_decomp.hh
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_LU_DECOMP
#define ACTION_LU_DECOMP
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_lu_decomp {
public :
// Ctor
Action_lu_decomp( int size ):_size(size)
{
MESSAGE("Action_lu_decomp Ctor");
// STL vector initialization
init_matrix<pseudo_random>(X_stl,_size);
init_matrix<null_function>(C_stl,_size);
init_matrix<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
_cost = 2.0*size*size*size/3.0 + size*size;
}
// invalidate copy ctor
Action_lu_decomp( const Action_lu_decomp & )
{
INFOS("illegal call to Action_lu_decomp Copy Ctor");
exit(1);
}
// Dtor
~Action_lu_decomp( void ){
MESSAGE("Action_lu_decomp Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
}
// action name
static inline std::string name( void )
{
return "lu_decomp_"+Interface::name();
}
double nb_op_base( void ){
return _cost;
}
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::lu_decomp(X,C,_size);
}
void check_result( void ){
// calculation check
Interface::matrix_to_stl(C,resu_stl);
// STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;
typename Interface::stl_matrix resu_stl;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix X;
typename Interface::gene_matrix C;
int _size;
int _cost;
};
#endif

View File

@ -120,7 +120,7 @@ public :
if (error>1.e-6){
INFOS("WRONG CALCULATION...residual=" << error);
exit(1);
// exit(1);
}
}

View File

@ -121,7 +121,7 @@ public :
if (error>1.e-5){
INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
exit(0);
}
}

View File

@ -1,9 +1,8 @@
ADD_CUSTOM_TARGET(copy_scripts)
SET(script_files go_mean aat.hh ata.hh axpy.hh axpby.hh cholesky.hh trisolve.hh hessenberg.hh tridiagonalization.hh
order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh
matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh )
SET(script_files go_mean mk_mean_script.sh mk_new_gnuplot.sh
perlib_plot_settings.txt action_settings.txt gnuplot_common_settings.hh )
FOREACH(script_file ${script_files})
ADD_CUSTOM_COMMAND(

View File

@ -1,4 +0,0 @@
set title "{/*1.5 A x A^T}"
set xlabel "matrix size" 0.000000,0.000000
set xrange [4:1024]

View File

@ -0,0 +1,12 @@
aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:1024
ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:1024
atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:1024
axpby ; "{/*1.5 Y = alpha * X + beta * Y}" ; "vector size" ; 5:1000000
axpy ; "{/*1.5 Y += alpha * X}" ; "vector size" ; 5:1000000
matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:1024
matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:1024
trisolve ; "{/*1.5 triangular solver (X = inv(L) * X)}" ; "size" ; 4:1024
cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:1024
lu_decomp ; "{/*1.5 LU decomposition}" ; "matrix size" ; 4:1024
tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:1024
hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:1024

View File

@ -1,4 +0,0 @@
set title "{/*1.5 A^T x A}"
set xlabel "matrix size" 0.000000,0.000000
set xrange [4:1024]

View File

@ -1,3 +0,0 @@
set title "{/*1.5 matrix^T x vector}" 0.000000,0.000000
set xlabel "matrix size" 0.000000,0.000000
set xrange [4:1024]

View File

@ -1,3 +0,0 @@
set title "{/*1.5 Y = alpha * X + beta * Y}" 0.000000,0.000000
set xlabel "vector size" 0.000000,0.000000
set xrange [5:1000000]

View File

@ -1,3 +0,0 @@
set title "{/*1.5 Y += alpha * X}" 0.000000,0.000000
set xlabel "vector size" 0.000000,0.000000
set xrange [5:1000000]

View File

@ -1,3 +0,0 @@
set title "{/*1.5 Cholesky decomposition}" 0.000000,0.000000
set xlabel "matrix size" 0.000000,0.000000
set xrange [6:1000]

View File

@ -5,7 +5,9 @@ mkdir -p $1
EIGENDIR=`cat eigen_root_dir.txt`
webpagefilename=$1/index.html
#cp header.html $webpagefilename
meanstatsfilename=$1/mean.html
echo '' > $meanstatsfilename
echo '' > $webpagefilename
echo '<p><strong>Configuration</strong>' >> $webpagefilename
echo '<ul>'\
@ -16,17 +18,19 @@ echo '<ul>'\
'</ul>' \
'</p>' >> $webpagefilename
source mk_mean_script.sh axpy $1 11 2500 100000 250000 > $1/axpy.html
source mk_mean_script.sh axpby $1 11 2500 100000 250000 > $1/axpby.html
source mk_mean_script.sh matrix_vector $1 11 50 300 1000 > $1/matrix_vector.html
source mk_mean_script.sh atv $1 11 50 300 1000 > $1/atv.html
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 > $1/matrix_matrix.html
source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html
source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html
source mk_mean_script.sh trisolve $1 11 100 300 1000 > $1/trisolve.html
source mk_mean_script.sh cholesky $1 11 100 300 1000 > $1/cholesky.html
source mk_mean_script.sh hessenberg $1 11 100 300 1000 > $1/hessenberg.html
source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 > $1/tridiagonalization.html
source mk_mean_script.sh axpy $1 11 2500 100000 250000 $2
source mk_mean_script.sh axpby $1 11 2500 100000 250000 $2
source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $2
source mk_mean_script.sh atv $1 11 50 300 1000 $2
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $2
source mk_mean_script.sh aat $1 11 100 300 1000 $2
source mk_mean_script.sh ata $1 11 100 300 1000 $2
source mk_mean_script.sh trisolve $1 11 100 300 1000 $2
source mk_mean_script.sh cholesky $1 11 100 300 1000 $2
source mk_mean_script.sh lu_decomp $1 11 100 300 1000 $2
source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $2
source mk_mean_script.sh hessenberg $1 11 100 300 1000 $2
## compile the web page ##

View File

@ -1,3 +0,0 @@
set title "{/*1.5 Hessenberg decomposition}" 0.000000,0.000000
set xlabel "matrix size" 0.000000,0.000000
set xrange [6:1000]

View File

@ -1,3 +0,0 @@
set title "{/*1.5 matrix matrix product}"
set xlabel "matrix size" 0.000000,0.000000
set xrange [4:1024]

View File

@ -1,3 +0,0 @@
set title "{/*1.5 matrix vector product}" 0.000000,0.000000
set xlabel "matrix size" 0.000000,0.000000
set xrange [4:1024]

View File

@ -6,12 +6,14 @@ MAXIC=$4
MINOC=$5
MAXOC=$6
meanstatsfilename=$2/mean.html
WORK_DIR=tmp
mkdir $WORK_DIR
DATA_FILE=`find $DIR -name "*.dat" | grep _${WHAT}`
echo ""
echo $"1..."
echo "$1..."
for FILE in $DATA_FILE
do
##echo hello world
@ -24,13 +26,15 @@ do
done
cd $WORK_DIR
../main $1 $3 $4 $5 $6 *
../mk_new_gnuplot.sh $1 $2
../main $1 $3 $4 $5 $6 * >> ../$meanstatsfilename
../mk_new_gnuplot.sh $1 $2 $7
rm -f *.gnuplot
cd ..
rm -R $WORK_DIR
echo '<br/>' >> $meanstatsfilename
webpagefilename=$2/index.html
# echo '<h3>'${WHAT}'</h3>' >> $webpagefilename
echo '<hr/><a href="/btl/'$1'.pdf"><img src="/btl/'$1'.png" alt="'${WHAT}'" /></a><br/>' >> $webpagefilename

View File

@ -1,14 +1,30 @@
#! /bin/bash
#!/bin/bash
WHAT=$1
DIR=$2
cat ../gnuplot_common_settings.hh > ${WHAT}.gnuplot
cat ../${WHAT}.hh >> ${WHAT}.gnuplot
echo "set title " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 2` >> $WHAT.gnuplot
echo "set xlabel " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 3` "0.000000,0.000000" >> $WHAT.gnuplot
echo "set xrange [" `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 4` "]" >> $WHAT.gnuplot
if [ $# > 3 ]; then
if [ $3 == "tiny" ]; then
echo "set xrange [2:16]" >> $WHAT.gnuplot
echo "set nologscale" >> $WHAT.gnuplot
fi
fi
DATA_FILE=`cat ../order_lib`
echo set term postscript color rounded enhanced >> $WHAT.gnuplot
echo set output "'"../${DIR}/$WHAT.ps"'" >> $WHAT.gnuplot
# echo set term svg color rounded enhanced >> $WHAT.gnuplot
# echo "set terminal svg enhanced size 1000 1000 fname \"Times\" fsize 36" >> $WHAT.gnuplot
# echo set output "'"../${DIR}/$WHAT.svg"'" >> $WHAT.gnuplot
echo plot \\ >> $WHAT.gnuplot
for FILE in $DATA_FILE
@ -34,3 +50,5 @@ rm $WHAT.gnuplot
ps2pdf ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.pdf
convert -density 120 -rotate 90 -resize 800 +dither -colors 48 -quality 0 ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.png
# pstoedit -rotate -90 -xscale 0.8 -yscale 0.8 -centered -yshift -50 -xshift -100 -f plot-svg aat.ps aat2.svg

View File

@ -1,11 +0,0 @@
eigen2_SSE
eigen2
INTEL_MKL
ATLAS
STL
C
gmm
mtl4
ublas
blitz
F77

View File

@ -1,3 +0,0 @@
set title "{/*1.5 Tridiagonalization}" 0.000000,0.000000
set xlabel "matrix size" 0.000000,0.000000
set xrange [6:1000]

View File

@ -1,3 +0,0 @@
set title "{/*1.5 triangular solver (X = inv(L) * X)}" 0.000000,0.000000
set xlabel "matrix-vector size" 0.000000,0.000000
set xrange [6:1000]

View File

@ -53,13 +53,15 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
std::vector<int> tab_sizes(nb_point);
// matrices and vector size calculations
size_lin_log(nb_point,size_min,size_max,tab_sizes);
std::vector<int> oldSizes;
std::vector<double> oldFlops;
bool hasOldResults = read_xy_file(filename, oldSizes, oldFlops, true);
int oldi = oldSizes.size() - 1;
// loop on matrix size
Perf_Analyzer<Action> perf_action;
for (int i=nb_point-1;i>=0;i--)
{
//INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
@ -74,14 +76,28 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
#endif
tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
std::cout << tab_mflops[i];
if (hasOldResults)
{
while (oldi>=0 && oldSizes[oldi]>tab_sizes[i])
--oldi;
if (oldi>=0 && oldSizes[oldi]==tab_sizes[i])
{
if (oldFlops[oldi]<tab_mflops[i])
std::cout << "\t > ";
else
std::cout << "\t < ";
std::cout << oldFlops[oldi];
}
--oldi;
}
std::cout << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
}
if (!BtlConfig::Instance.overwriteResults)
{
std::vector<int> oldSizes;
std::vector<double> oldFlops;
if (read_xy_file(filename, oldSizes, oldFlops, true))
if (hasOldResults)
{
// merge the two data
std::vector<int> newSizes;

View File

@ -27,8 +27,8 @@
#include "xy_file.hh"
#include "static/static_size_generator.hh"
#include "timers/portable_perf_analyzer.hh"
#include "timers/mixed_perf_analyzer.hh"
#include "timers/x86_perf_analyzer.hh"
// #include "timers/mixed_perf_analyzer.hh"
// #include "timers/x86_perf_analyzer.hh"
using namespace std;
@ -50,7 +50,7 @@ BTL_DONT_INLINE void bench_static(void)
static_size_generator<max_size,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops);
dump_file_x_y(tab_sizes,tab_mflops,filename);
dump_xy_file(tab_sizes,tab_mflops,filename);
}
// default Perf Analyzer

View File

@ -27,12 +27,13 @@ using namespace std;
template <int SIZE,template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface>
struct static_size_generator{
static void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
static void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
{
tab_sizes.push_back(SIZE);
std::cout << tab_sizes.back() << " \t" << std::flush;
Perf_Analyzer<Action<Interface<REAL_TYPE,SIZE> > > perf_action;
tab_mflops.push_back(perf_action.eval_mflops(SIZE));
std::cout << tab_mflops.back() << " MFlops" << std::endl;
static_size_generator<SIZE-1,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops);
};
};

View File

@ -50,6 +50,11 @@ void scopy_(const int *n, const float *x, const int *incx, float *y, const int *
void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work, int *lwork, int *info );
void sgehrd_( const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork, int *info );
// LU row pivoting
void sgetrf_(const int* m, const int* n, float *a, const int* ld, int* ipivot, int* info);
// LU full pivoting
void sgetc2_(const int* n, float *a, const int *lda, int *ipiv, int *jpiv, int*info );
#ifdef HAS_LAPACK
#endif
}
@ -193,7 +198,8 @@ public :
}
static inline void cholesky(const gene_vector & X, gene_vector & C, int N){
cblas_scopy(N*N, X, 1, C, 1);
int N2 = N*N;
scopy_(&N2, X, &intone, C, &intone);
char uplo = 'L';
int info = 0;
spotrf_(&uplo, &N, C, &N, &info);
@ -201,6 +207,16 @@ public :
#ifdef HAS_LAPACK
static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
int N2 = N*N;
scopy_(&N2, X, &intone, C, &intone);
char uplo = 'L';
int info = 0;
int * ipiv = (int*)alloca(sizeof(int)*N);
int * jpiv = (int*)alloca(sizeof(int)*N);
sgetc2_(&N, C, &N, ipiv, jpiv, &info);
}
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
cblas_scopy(N*N, X, 1, C, 1);
int info = 0;

View File

@ -23,6 +23,7 @@
#include "basic_actions.hh"
#include "action_cholesky.hh"
#include "action_lu_decomp.hh"
#ifdef HAS_LAPACK
#include "action_hessenberg.hh"
@ -45,9 +46,10 @@ int main()
bench<Action_trisolve<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
#ifdef HAS_LAPACK
bench<Action_lu_decomp<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_hessenberg<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_tridiagonalization<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
#endif

View File

@ -1,4 +1,4 @@
find_package(MKL)
find_package(Eigen2)
if (EIGEN2_FOUND)
@ -25,6 +25,11 @@ if (EIGEN2_FOUND)
if(BUILD_btl_eigen2_novec_adv)
set_target_properties(btl_eigen2_novec_adv PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
endif(BUILD_btl_eigen2_novec_adv)
if(BUILD_btl_eigen2_adv)
target_link_libraries(btl_eigen2_adv ${MKL_LIBRARIES})
endif(BUILD_btl_eigen2_adv)
ENDIF(NOT BTL_NOVEC)
btl_add_bench(btl_tiny_eigen2 btl_tiny_eigen2.cpp OFF)

View File

@ -25,6 +25,8 @@
#include "action_ata_product.hh"
#include "action_aat_product.hh"
#include "action_atv_product.hh"
#include "action_cholesky.hh"
#include "action_trisolve.hh"
BTL_MAIN;
@ -35,6 +37,8 @@ int main()
bench_static<Action_matrix_matrix_product,eigen2_interface>();
bench_static<Action_matrix_vector_product,eigen2_interface>();
bench_static<Action_atv_product,eigen2_interface>();
bench_static<Action_cholesky,eigen2_interface>();
bench_static<Action_trisolve,eigen2_interface>();
return 0;
}

View File

@ -17,9 +17,10 @@
//
#ifndef EIGEN2_INTERFACE_HH
#define EIGEN2_INTERFACE_HH
#include <cblas.h>
#include <Eigen/Core>
#include <Eigen/Cholesky>
#include <Eigen/LU>
#include <Eigen/QR>
#include <vector>
#include "btl.hh"
@ -138,16 +139,25 @@ public :
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
C = X.cholesky().matrixL();
// C = X;
// Cholesky<gene_matrix>::computeInPlace(C);
// Cholesky<gene_matrix>::computeInPlaceBlock(C);
}
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
C = HessenbergDecomposition<gene_matrix>(X).packedMatrix();
static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
C = X.lu().matrixLU();
}
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
C = Tridiagonalization<gene_matrix>(X).packedMatrix();
}
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
C = HessenbergDecomposition<gene_matrix>(X).packedMatrix();
}
};
#endif

View File

@ -21,6 +21,7 @@
#include "action_trisolve.hh"
#include "action_cholesky.hh"
#include "action_hessenberg.hh"
#include "action_lu_decomp.hh"
BTL_MAIN;
@ -28,6 +29,7 @@ int main()
{
bench<Action_trisolve<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_cholesky<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_lu_decomp<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_hessenberg<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_tridiagonalization<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

View File

@ -123,14 +123,20 @@ public :
gmm::lower_tri_solve(L, X, false);
}
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
gmm::copy(X,C);
gmm::Hessenberg_reduction(C,X,false);
static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){
gmm::copy(X,R);
std::vector<int> ipvt(N);
gmm::lu_factor(R, ipvt);
}
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
gmm::copy(X,C);
gmm::Householder_tridiagonalization(C,X,false);
static inline void hessenberg(const gene_matrix & X, gene_matrix & R, int N){
gmm::copy(X,R);
gmm::Hessenberg_reduction(R,X,false);
}
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & R, int N){
gmm::copy(X,R);
gmm::Householder_tridiagonalization(R,X,false);
}
};

View File

@ -20,6 +20,7 @@
#include "bench.hh"
#include "basic_actions.hh"
#include "action_hessenberg.hh"
#include "action_lu_decomp.hh"
BTL_MAIN;
@ -39,6 +40,8 @@ int main()
bench<Action_trisolve<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
//bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
bench<Action_lu_decomp<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_hessenberg<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_tridiagonalization<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

View File

@ -714,17 +714,36 @@ public :
{
Packet pcoef = ei_pset1(coef);
#ifdef PEELING
int ANP = (AN/(8*PacketSize))*8*PacketSize;
for (int j = 0;j<ANP;j+=PacketSize*8)
const int peelSize = 3;
int ANP = (AN/(peelSize*PacketSize))*peelSize*PacketSize;
float* X1 = X + PacketSize;
float* Y1 = Y + PacketSize;
float* X2 = X + 2*PacketSize;
float* Y2 = Y + 2*PacketSize;
Packet x0,x1,x2,y0,y1,y2;
for (int j = 0;j<ANP;j+=PacketSize*peelSize)
{
ei_pstore(&Y[j ], ei_padd(ei_pload(&Y[j ]), ei_pmul(pcoef,ei_pload(&X[j ]))));
ei_pstore(&Y[j+ PacketSize], ei_padd(ei_pload(&Y[j+ PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+ PacketSize]))));
ei_pstore(&Y[j+2*PacketSize], ei_padd(ei_pload(&Y[j+2*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+2*PacketSize]))));
ei_pstore(&Y[j+3*PacketSize], ei_padd(ei_pload(&Y[j+3*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+3*PacketSize]))));
ei_pstore(&Y[j+4*PacketSize], ei_padd(ei_pload(&Y[j+4*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+4*PacketSize]))));
ei_pstore(&Y[j+5*PacketSize], ei_padd(ei_pload(&Y[j+5*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+5*PacketSize]))));
ei_pstore(&Y[j+6*PacketSize], ei_padd(ei_pload(&Y[j+6*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+6*PacketSize]))));
ei_pstore(&Y[j+7*PacketSize], ei_padd(ei_pload(&Y[j+7*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+7*PacketSize]))));
x0 = ei_pload(X+j);
x1 = ei_pload(X1+j);
x2 = ei_pload(X2+j);
y0 = ei_pload(Y+j);
y1 = ei_pload(Y1+j);
y2 = ei_pload(Y2+j);
y0 = ei_pmadd(pcoef, x0, y0);
y1 = ei_pmadd(pcoef, x1, y1);
y2 = ei_pmadd(pcoef, x2, y2);
ei_pstore(Y+j, y0);
ei_pstore(Y1+j, y1);
ei_pstore(Y2+j, y2);
// ei_pstore(&Y[j+2*PacketSize], ei_padd(ei_pload(&Y[j+2*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+2*PacketSize]))));
// ei_pstore(&Y[j+3*PacketSize], ei_padd(ei_pload(&Y[j+3*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+3*PacketSize]))));
// ei_pstore(&Y[j+4*PacketSize], ei_padd(ei_pload(&Y[j+4*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+4*PacketSize]))));
// ei_pstore(&Y[j+5*PacketSize], ei_padd(ei_pload(&Y[j+5*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+5*PacketSize]))));
// ei_pstore(&Y[j+6*PacketSize], ei_padd(ei_pload(&Y[j+6*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+6*PacketSize]))));
// ei_pstore(&Y[j+7*PacketSize], ei_padd(ei_pload(&Y[j+7*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+7*PacketSize]))));
}
for (int j = ANP;j<AN;j+=PacketSize)
ei_pstore(&Y[j], ei_padd(ei_pload(&Y[j]), ei_pmul(pcoef,ei_pload(&X[j]))));

View File

@ -27,7 +27,7 @@
#include "action_ata_product.hh"
#include "action_aat_product.hh"
//#include "action_lu_solve.hh"
#include "timers/mixed_perf_analyzer.hh"
// #include "timers/mixed_perf_analyzer.hh"
BTL_MAIN;

View File

@ -20,6 +20,7 @@
#include "bench.hh"
#include "basic_actions.hh"
#include "action_cholesky.hh"
// #include "action_lu_decomp.hh"
BTL_MAIN;
@ -37,6 +38,7 @@ int main()
bench<Action_trisolve<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_cholesky<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_lu_decomp<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
return 0;
}

View File

@ -120,6 +120,12 @@ public :
recursive_cholesky(C);
}
// static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){
// R = X;
// std::vector<int> ipvt(N);
// lu_factor(R, ipvt);
// }
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
X = lower_trisolve(L, B);
}

View File

@ -41,7 +41,7 @@ public :
typedef Vector<real,SIZE> gene_vector;
typedef Matrix<real,SIZE,SIZE> gene_matrix;
static inline std::string name() { return "tvmet"; }
static inline std::string name() { return "tiny_tvmet"; }
static void free_matrix(gene_matrix & A, int N){}