mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-31 19:00:35 +08:00
Add a LU decomposition action in BTL and various cleaning in BTL. For instance
all per plot settings have been moved to a single file, go_mean now takes an optional second argument "tiny" to generate plots for tiny matrices, and output of comparison information wrt to previous benchs (if any).
This commit is contained in:
parent
c2f8ecf466
commit
a7a05382d1
@ -9,7 +9,8 @@ OPTION(BTL_NOVEC "Disable SSE/Altivec optimizations when possible" OFF)
|
||||
|
||||
SET(CMAKE_INCLUDE_CURRENT_DIR ON)
|
||||
|
||||
IF(CMAKE_COMPILER_IS_GNUCXX)
|
||||
string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER})
|
||||
IF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
|
||||
SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG")
|
||||
SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG")
|
||||
IF(NOT BTL_NOVEC)
|
||||
@ -18,7 +19,12 @@ IF(CMAKE_COMPILER_IS_GNUCXX)
|
||||
ELSE(NOT BTL_NOVEC)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE")
|
||||
ENDIF(NOT BTL_NOVEC)
|
||||
ENDIF(CMAKE_COMPILER_IS_GNUCXX)
|
||||
ENDIF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
|
||||
|
||||
if(IS_ICPC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fast")
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fast")
|
||||
endif(IS_ICPC)
|
||||
|
||||
include_directories(
|
||||
${PROJECT_SOURCE_DIR}/actions
|
||||
@ -76,3 +82,4 @@ add_subdirectory(libs/STL_algo)
|
||||
|
||||
add_subdirectory(data)
|
||||
|
||||
|
||||
|
@ -102,7 +102,7 @@ public :
|
||||
|
||||
if (error>1.e-6){
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(0);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -85,7 +85,9 @@ public :
|
||||
}
|
||||
|
||||
inline void calculate( void ) {
|
||||
asm("#mybegin axpby");
|
||||
Interface::axpby(_alpha,X,_beta,Y,_size);
|
||||
asm("#myend axpby");
|
||||
}
|
||||
|
||||
void check_result( void ){
|
||||
|
@ -96,7 +96,9 @@ public :
|
||||
}
|
||||
|
||||
inline void calculate( void ) {
|
||||
Interface::axpy(_coef,X,Y,_size);
|
||||
asm("#mybegin axpy");
|
||||
Interface::axpy(_coef,X,Y,_size);
|
||||
asm("#myend axpy");
|
||||
}
|
||||
|
||||
void check_result( void ){
|
||||
|
124
bench/btl/actions/action_lu_decomp.hh
Normal file
124
bench/btl/actions/action_lu_decomp.hh
Normal file
@ -0,0 +1,124 @@
|
||||
//=====================================================
|
||||
// File : action_lu_decomp.hh
|
||||
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||
//=====================================================
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
//
|
||||
#ifndef ACTION_LU_DECOMP
|
||||
#define ACTION_LU_DECOMP
|
||||
#include "utilities.h"
|
||||
#include "STL_interface.hh"
|
||||
#include <string>
|
||||
#include "init/init_function.hh"
|
||||
#include "init/init_vector.hh"
|
||||
#include "init/init_matrix.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template<class Interface>
|
||||
class Action_lu_decomp {
|
||||
|
||||
public :
|
||||
|
||||
// Ctor
|
||||
|
||||
Action_lu_decomp( int size ):_size(size)
|
||||
{
|
||||
MESSAGE("Action_lu_decomp Ctor");
|
||||
|
||||
// STL vector initialization
|
||||
init_matrix<pseudo_random>(X_stl,_size);
|
||||
|
||||
init_matrix<null_function>(C_stl,_size);
|
||||
init_matrix<null_function>(resu_stl,_size);
|
||||
|
||||
// generic matrix and vector initialization
|
||||
Interface::matrix_from_stl(X_ref,X_stl);
|
||||
Interface::matrix_from_stl(X,X_stl);
|
||||
Interface::matrix_from_stl(C,C_stl);
|
||||
|
||||
_cost = 2.0*size*size*size/3.0 + size*size;
|
||||
}
|
||||
|
||||
// invalidate copy ctor
|
||||
|
||||
Action_lu_decomp( const Action_lu_decomp & )
|
||||
{
|
||||
INFOS("illegal call to Action_lu_decomp Copy Ctor");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Dtor
|
||||
|
||||
~Action_lu_decomp( void ){
|
||||
|
||||
MESSAGE("Action_lu_decomp Dtor");
|
||||
|
||||
// deallocation
|
||||
Interface::free_matrix(X_ref,_size);
|
||||
Interface::free_matrix(X,_size);
|
||||
Interface::free_matrix(C,_size);
|
||||
}
|
||||
|
||||
// action name
|
||||
|
||||
static inline std::string name( void )
|
||||
{
|
||||
return "lu_decomp_"+Interface::name();
|
||||
}
|
||||
|
||||
double nb_op_base( void ){
|
||||
return _cost;
|
||||
}
|
||||
|
||||
inline void initialize( void ){
|
||||
Interface::copy_matrix(X_ref,X,_size);
|
||||
}
|
||||
|
||||
inline void calculate( void ) {
|
||||
Interface::lu_decomp(X,C,_size);
|
||||
}
|
||||
|
||||
void check_result( void ){
|
||||
// calculation check
|
||||
Interface::matrix_to_stl(C,resu_stl);
|
||||
|
||||
// STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
|
||||
//
|
||||
// typename Interface::real_type error=
|
||||
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
|
||||
//
|
||||
// if (error>1.e-6){
|
||||
// INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
private :
|
||||
|
||||
typename Interface::stl_matrix X_stl;
|
||||
typename Interface::stl_matrix C_stl;
|
||||
typename Interface::stl_matrix resu_stl;
|
||||
|
||||
typename Interface::gene_matrix X_ref;
|
||||
typename Interface::gene_matrix X;
|
||||
typename Interface::gene_matrix C;
|
||||
|
||||
int _size;
|
||||
int _cost;
|
||||
};
|
||||
|
||||
#endif
|
@ -120,7 +120,7 @@ public :
|
||||
|
||||
if (error>1.e-6){
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
exit(1);
|
||||
// exit(1);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -121,7 +121,7 @@ public :
|
||||
|
||||
if (error>1.e-5){
|
||||
INFOS("WRONG CALCULATION...residual=" << error);
|
||||
// exit(0);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,9 +1,8 @@
|
||||
|
||||
ADD_CUSTOM_TARGET(copy_scripts)
|
||||
|
||||
SET(script_files go_mean aat.hh ata.hh axpy.hh axpby.hh cholesky.hh trisolve.hh hessenberg.hh tridiagonalization.hh
|
||||
order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh
|
||||
matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh )
|
||||
SET(script_files go_mean mk_mean_script.sh mk_new_gnuplot.sh
|
||||
perlib_plot_settings.txt action_settings.txt gnuplot_common_settings.hh )
|
||||
|
||||
FOREACH(script_file ${script_files})
|
||||
ADD_CUSTOM_COMMAND(
|
||||
|
@ -1,4 +0,0 @@
|
||||
set title "{/*1.5 A x A^T}"
|
||||
set xlabel "matrix size" 0.000000,0.000000
|
||||
set xrange [4:1024]
|
||||
|
12
bench/btl/data/action_settings.txt
Normal file
12
bench/btl/data/action_settings.txt
Normal file
@ -0,0 +1,12 @@
|
||||
aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:1024
|
||||
ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:1024
|
||||
atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:1024
|
||||
axpby ; "{/*1.5 Y = alpha * X + beta * Y}" ; "vector size" ; 5:1000000
|
||||
axpy ; "{/*1.5 Y += alpha * X}" ; "vector size" ; 5:1000000
|
||||
matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:1024
|
||||
matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:1024
|
||||
trisolve ; "{/*1.5 triangular solver (X = inv(L) * X)}" ; "size" ; 4:1024
|
||||
cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:1024
|
||||
lu_decomp ; "{/*1.5 LU decomposition}" ; "matrix size" ; 4:1024
|
||||
tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:1024
|
||||
hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:1024
|
@ -1,4 +0,0 @@
|
||||
set title "{/*1.5 A^T x A}"
|
||||
set xlabel "matrix size" 0.000000,0.000000
|
||||
set xrange [4:1024]
|
||||
|
@ -1,3 +0,0 @@
|
||||
set title "{/*1.5 matrix^T x vector}" 0.000000,0.000000
|
||||
set xlabel "matrix size" 0.000000,0.000000
|
||||
set xrange [4:1024]
|
@ -1,3 +0,0 @@
|
||||
set title "{/*1.5 Y = alpha * X + beta * Y}" 0.000000,0.000000
|
||||
set xlabel "vector size" 0.000000,0.000000
|
||||
set xrange [5:1000000]
|
@ -1,3 +0,0 @@
|
||||
set title "{/*1.5 Y += alpha * X}" 0.000000,0.000000
|
||||
set xlabel "vector size" 0.000000,0.000000
|
||||
set xrange [5:1000000]
|
@ -1,3 +0,0 @@
|
||||
set title "{/*1.5 Cholesky decomposition}" 0.000000,0.000000
|
||||
set xlabel "matrix size" 0.000000,0.000000
|
||||
set xrange [6:1000]
|
@ -5,7 +5,9 @@ mkdir -p $1
|
||||
EIGENDIR=`cat eigen_root_dir.txt`
|
||||
|
||||
webpagefilename=$1/index.html
|
||||
#cp header.html $webpagefilename
|
||||
meanstatsfilename=$1/mean.html
|
||||
|
||||
echo '' > $meanstatsfilename
|
||||
echo '' > $webpagefilename
|
||||
echo '<p><strong>Configuration</strong>' >> $webpagefilename
|
||||
echo '<ul>'\
|
||||
@ -16,17 +18,19 @@ echo '<ul>'\
|
||||
'</ul>' \
|
||||
'</p>' >> $webpagefilename
|
||||
|
||||
source mk_mean_script.sh axpy $1 11 2500 100000 250000 > $1/axpy.html
|
||||
source mk_mean_script.sh axpby $1 11 2500 100000 250000 > $1/axpby.html
|
||||
source mk_mean_script.sh matrix_vector $1 11 50 300 1000 > $1/matrix_vector.html
|
||||
source mk_mean_script.sh atv $1 11 50 300 1000 > $1/atv.html
|
||||
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 > $1/matrix_matrix.html
|
||||
source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html
|
||||
source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html
|
||||
source mk_mean_script.sh trisolve $1 11 100 300 1000 > $1/trisolve.html
|
||||
source mk_mean_script.sh cholesky $1 11 100 300 1000 > $1/cholesky.html
|
||||
source mk_mean_script.sh hessenberg $1 11 100 300 1000 > $1/hessenberg.html
|
||||
source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 > $1/tridiagonalization.html
|
||||
source mk_mean_script.sh axpy $1 11 2500 100000 250000 $2
|
||||
source mk_mean_script.sh axpby $1 11 2500 100000 250000 $2
|
||||
source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $2
|
||||
source mk_mean_script.sh atv $1 11 50 300 1000 $2
|
||||
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $2
|
||||
source mk_mean_script.sh aat $1 11 100 300 1000 $2
|
||||
source mk_mean_script.sh ata $1 11 100 300 1000 $2
|
||||
source mk_mean_script.sh trisolve $1 11 100 300 1000 $2
|
||||
source mk_mean_script.sh cholesky $1 11 100 300 1000 $2
|
||||
source mk_mean_script.sh lu_decomp $1 11 100 300 1000 $2
|
||||
source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $2
|
||||
source mk_mean_script.sh hessenberg $1 11 100 300 1000 $2
|
||||
|
||||
|
||||
## compile the web page ##
|
||||
|
||||
|
@ -1,3 +0,0 @@
|
||||
set title "{/*1.5 Hessenberg decomposition}" 0.000000,0.000000
|
||||
set xlabel "matrix size" 0.000000,0.000000
|
||||
set xrange [6:1000]
|
@ -1,3 +0,0 @@
|
||||
set title "{/*1.5 matrix matrix product}"
|
||||
set xlabel "matrix size" 0.000000,0.000000
|
||||
set xrange [4:1024]
|
@ -1,3 +0,0 @@
|
||||
set title "{/*1.5 matrix vector product}" 0.000000,0.000000
|
||||
set xlabel "matrix size" 0.000000,0.000000
|
||||
set xrange [4:1024]
|
@ -6,12 +6,14 @@ MAXIC=$4
|
||||
MINOC=$5
|
||||
MAXOC=$6
|
||||
|
||||
meanstatsfilename=$2/mean.html
|
||||
|
||||
WORK_DIR=tmp
|
||||
mkdir $WORK_DIR
|
||||
|
||||
DATA_FILE=`find $DIR -name "*.dat" | grep _${WHAT}`
|
||||
echo ""
|
||||
echo $"1..."
|
||||
echo "$1..."
|
||||
for FILE in $DATA_FILE
|
||||
do
|
||||
##echo hello world
|
||||
@ -24,13 +26,15 @@ do
|
||||
done
|
||||
|
||||
cd $WORK_DIR
|
||||
../main $1 $3 $4 $5 $6 *
|
||||
../mk_new_gnuplot.sh $1 $2
|
||||
../main $1 $3 $4 $5 $6 * >> ../$meanstatsfilename
|
||||
../mk_new_gnuplot.sh $1 $2 $7
|
||||
rm -f *.gnuplot
|
||||
cd ..
|
||||
|
||||
rm -R $WORK_DIR
|
||||
|
||||
echo '<br/>' >> $meanstatsfilename
|
||||
|
||||
webpagefilename=$2/index.html
|
||||
# echo '<h3>'${WHAT}'</h3>' >> $webpagefilename
|
||||
echo '<hr/><a href="/btl/'$1'.pdf"><img src="/btl/'$1'.png" alt="'${WHAT}'" /></a><br/>' >> $webpagefilename
|
||||
|
@ -1,14 +1,30 @@
|
||||
#! /bin/bash
|
||||
#!/bin/bash
|
||||
WHAT=$1
|
||||
DIR=$2
|
||||
|
||||
cat ../gnuplot_common_settings.hh > ${WHAT}.gnuplot
|
||||
cat ../${WHAT}.hh >> ${WHAT}.gnuplot
|
||||
|
||||
echo "set title " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 2` >> $WHAT.gnuplot
|
||||
echo "set xlabel " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 3` "0.000000,0.000000" >> $WHAT.gnuplot
|
||||
echo "set xrange [" `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 4` "]" >> $WHAT.gnuplot
|
||||
|
||||
if [ $# > 3 ]; then
|
||||
if [ $3 == "tiny" ]; then
|
||||
echo "set xrange [2:16]" >> $WHAT.gnuplot
|
||||
echo "set nologscale" >> $WHAT.gnuplot
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
|
||||
DATA_FILE=`cat ../order_lib`
|
||||
echo set term postscript color rounded enhanced >> $WHAT.gnuplot
|
||||
echo set output "'"../${DIR}/$WHAT.ps"'" >> $WHAT.gnuplot
|
||||
|
||||
# echo set term svg color rounded enhanced >> $WHAT.gnuplot
|
||||
# echo "set terminal svg enhanced size 1000 1000 fname \"Times\" fsize 36" >> $WHAT.gnuplot
|
||||
# echo set output "'"../${DIR}/$WHAT.svg"'" >> $WHAT.gnuplot
|
||||
|
||||
echo plot \\ >> $WHAT.gnuplot
|
||||
|
||||
for FILE in $DATA_FILE
|
||||
@ -34,3 +50,5 @@ rm $WHAT.gnuplot
|
||||
|
||||
ps2pdf ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.pdf
|
||||
convert -density 120 -rotate 90 -resize 800 +dither -colors 48 -quality 0 ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.png
|
||||
|
||||
# pstoedit -rotate -90 -xscale 0.8 -yscale 0.8 -centered -yshift -50 -xshift -100 -f plot-svg aat.ps aat2.svg
|
||||
|
@ -1,11 +0,0 @@
|
||||
eigen2_SSE
|
||||
eigen2
|
||||
INTEL_MKL
|
||||
ATLAS
|
||||
STL
|
||||
C
|
||||
gmm
|
||||
mtl4
|
||||
ublas
|
||||
blitz
|
||||
F77
|
@ -1,3 +0,0 @@
|
||||
set title "{/*1.5 Tridiagonalization}" 0.000000,0.000000
|
||||
set xlabel "matrix size" 0.000000,0.000000
|
||||
set xrange [6:1000]
|
@ -1,3 +0,0 @@
|
||||
set title "{/*1.5 triangular solver (X = inv(L) * X)}" 0.000000,0.000000
|
||||
set xlabel "matrix-vector size" 0.000000,0.000000
|
||||
set xrange [6:1000]
|
@ -53,13 +53,15 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
|
||||
std::vector<int> tab_sizes(nb_point);
|
||||
|
||||
// matrices and vector size calculations
|
||||
|
||||
size_lin_log(nb_point,size_min,size_max,tab_sizes);
|
||||
|
||||
std::vector<int> oldSizes;
|
||||
std::vector<double> oldFlops;
|
||||
bool hasOldResults = read_xy_file(filename, oldSizes, oldFlops, true);
|
||||
int oldi = oldSizes.size() - 1;
|
||||
|
||||
// loop on matrix size
|
||||
|
||||
Perf_Analyzer<Action> perf_action;
|
||||
|
||||
for (int i=nb_point-1;i>=0;i--)
|
||||
{
|
||||
//INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
|
||||
@ -74,14 +76,28 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
|
||||
#endif
|
||||
|
||||
tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
|
||||
std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
|
||||
std::cout << tab_mflops[i];
|
||||
|
||||
if (hasOldResults)
|
||||
{
|
||||
while (oldi>=0 && oldSizes[oldi]>tab_sizes[i])
|
||||
--oldi;
|
||||
if (oldi>=0 && oldSizes[oldi]==tab_sizes[i])
|
||||
{
|
||||
if (oldFlops[oldi]<tab_mflops[i])
|
||||
std::cout << "\t > ";
|
||||
else
|
||||
std::cout << "\t < ";
|
||||
std::cout << oldFlops[oldi];
|
||||
}
|
||||
--oldi;
|
||||
}
|
||||
std::cout << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
|
||||
}
|
||||
|
||||
if (!BtlConfig::Instance.overwriteResults)
|
||||
{
|
||||
std::vector<int> oldSizes;
|
||||
std::vector<double> oldFlops;
|
||||
if (read_xy_file(filename, oldSizes, oldFlops, true))
|
||||
if (hasOldResults)
|
||||
{
|
||||
// merge the two data
|
||||
std::vector<int> newSizes;
|
||||
|
@ -27,8 +27,8 @@
|
||||
#include "xy_file.hh"
|
||||
#include "static/static_size_generator.hh"
|
||||
#include "timers/portable_perf_analyzer.hh"
|
||||
#include "timers/mixed_perf_analyzer.hh"
|
||||
#include "timers/x86_perf_analyzer.hh"
|
||||
// #include "timers/mixed_perf_analyzer.hh"
|
||||
// #include "timers/x86_perf_analyzer.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -50,7 +50,7 @@ BTL_DONT_INLINE void bench_static(void)
|
||||
|
||||
static_size_generator<max_size,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops);
|
||||
|
||||
dump_file_x_y(tab_sizes,tab_mflops,filename);
|
||||
dump_xy_file(tab_sizes,tab_mflops,filename);
|
||||
}
|
||||
|
||||
// default Perf Analyzer
|
||||
|
@ -27,12 +27,13 @@ using namespace std;
|
||||
|
||||
template <int SIZE,template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface>
|
||||
struct static_size_generator{
|
||||
static void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
|
||||
static void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
|
||||
{
|
||||
tab_sizes.push_back(SIZE);
|
||||
|
||||
std::cout << tab_sizes.back() << " \t" << std::flush;
|
||||
Perf_Analyzer<Action<Interface<REAL_TYPE,SIZE> > > perf_action;
|
||||
tab_mflops.push_back(perf_action.eval_mflops(SIZE));
|
||||
std::cout << tab_mflops.back() << " MFlops" << std::endl;
|
||||
static_size_generator<SIZE-1,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops);
|
||||
};
|
||||
};
|
||||
|
@ -50,6 +50,11 @@ void scopy_(const int *n, const float *x, const int *incx, float *y, const int *
|
||||
void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
|
||||
void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work, int *lwork, int *info );
|
||||
void sgehrd_( const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork, int *info );
|
||||
|
||||
// LU row pivoting
|
||||
void sgetrf_(const int* m, const int* n, float *a, const int* ld, int* ipivot, int* info);
|
||||
// LU full pivoting
|
||||
void sgetc2_(const int* n, float *a, const int *lda, int *ipiv, int *jpiv, int*info );
|
||||
#ifdef HAS_LAPACK
|
||||
#endif
|
||||
}
|
||||
@ -193,7 +198,8 @@ public :
|
||||
}
|
||||
|
||||
static inline void cholesky(const gene_vector & X, gene_vector & C, int N){
|
||||
cblas_scopy(N*N, X, 1, C, 1);
|
||||
int N2 = N*N;
|
||||
scopy_(&N2, X, &intone, C, &intone);
|
||||
char uplo = 'L';
|
||||
int info = 0;
|
||||
spotrf_(&uplo, &N, C, &N, &info);
|
||||
@ -201,6 +207,16 @@ public :
|
||||
|
||||
#ifdef HAS_LAPACK
|
||||
|
||||
static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
|
||||
int N2 = N*N;
|
||||
scopy_(&N2, X, &intone, C, &intone);
|
||||
char uplo = 'L';
|
||||
int info = 0;
|
||||
int * ipiv = (int*)alloca(sizeof(int)*N);
|
||||
int * jpiv = (int*)alloca(sizeof(int)*N);
|
||||
sgetc2_(&N, C, &N, ipiv, jpiv, &info);
|
||||
}
|
||||
|
||||
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
|
||||
cblas_scopy(N*N, X, 1, C, 1);
|
||||
int info = 0;
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "basic_actions.hh"
|
||||
|
||||
#include "action_cholesky.hh"
|
||||
#include "action_lu_decomp.hh"
|
||||
|
||||
#ifdef HAS_LAPACK
|
||||
#include "action_hessenberg.hh"
|
||||
@ -45,9 +46,10 @@ int main()
|
||||
|
||||
bench<Action_trisolve<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
// bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
#ifdef HAS_LAPACK
|
||||
bench<Action_lu_decomp<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_hessenberg<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_tridiagonalization<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
#endif
|
||||
|
@ -1,4 +1,4 @@
|
||||
|
||||
find_package(MKL)
|
||||
find_package(Eigen2)
|
||||
if (EIGEN2_FOUND)
|
||||
|
||||
@ -25,6 +25,11 @@ if (EIGEN2_FOUND)
|
||||
if(BUILD_btl_eigen2_novec_adv)
|
||||
set_target_properties(btl_eigen2_novec_adv PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
|
||||
endif(BUILD_btl_eigen2_novec_adv)
|
||||
|
||||
if(BUILD_btl_eigen2_adv)
|
||||
target_link_libraries(btl_eigen2_adv ${MKL_LIBRARIES})
|
||||
endif(BUILD_btl_eigen2_adv)
|
||||
|
||||
ENDIF(NOT BTL_NOVEC)
|
||||
|
||||
btl_add_bench(btl_tiny_eigen2 btl_tiny_eigen2.cpp OFF)
|
||||
|
@ -25,6 +25,8 @@
|
||||
#include "action_ata_product.hh"
|
||||
#include "action_aat_product.hh"
|
||||
#include "action_atv_product.hh"
|
||||
#include "action_cholesky.hh"
|
||||
#include "action_trisolve.hh"
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
@ -35,6 +37,8 @@ int main()
|
||||
bench_static<Action_matrix_matrix_product,eigen2_interface>();
|
||||
bench_static<Action_matrix_vector_product,eigen2_interface>();
|
||||
bench_static<Action_atv_product,eigen2_interface>();
|
||||
bench_static<Action_cholesky,eigen2_interface>();
|
||||
bench_static<Action_trisolve,eigen2_interface>();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -17,9 +17,10 @@
|
||||
//
|
||||
#ifndef EIGEN2_INTERFACE_HH
|
||||
#define EIGEN2_INTERFACE_HH
|
||||
|
||||
#include <cblas.h>
|
||||
#include <Eigen/Core>
|
||||
#include <Eigen/Cholesky>
|
||||
#include <Eigen/LU>
|
||||
#include <Eigen/QR>
|
||||
#include <vector>
|
||||
#include "btl.hh"
|
||||
@ -138,16 +139,25 @@ public :
|
||||
|
||||
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
|
||||
C = X.cholesky().matrixL();
|
||||
// C = X;
|
||||
// Cholesky<gene_matrix>::computeInPlace(C);
|
||||
// Cholesky<gene_matrix>::computeInPlaceBlock(C);
|
||||
}
|
||||
|
||||
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
|
||||
C = HessenbergDecomposition<gene_matrix>(X).packedMatrix();
|
||||
static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
|
||||
C = X.lu().matrixLU();
|
||||
}
|
||||
|
||||
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
|
||||
C = Tridiagonalization<gene_matrix>(X).packedMatrix();
|
||||
}
|
||||
|
||||
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
|
||||
C = HessenbergDecomposition<gene_matrix>(X).packedMatrix();
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "action_trisolve.hh"
|
||||
#include "action_cholesky.hh"
|
||||
#include "action_hessenberg.hh"
|
||||
#include "action_lu_decomp.hh"
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
@ -28,6 +29,7 @@ int main()
|
||||
{
|
||||
bench<Action_trisolve<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_cholesky<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_lu_decomp<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
bench<Action_hessenberg<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_tridiagonalization<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
@ -123,14 +123,20 @@ public :
|
||||
gmm::lower_tri_solve(L, X, false);
|
||||
}
|
||||
|
||||
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
|
||||
gmm::copy(X,C);
|
||||
gmm::Hessenberg_reduction(C,X,false);
|
||||
static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){
|
||||
gmm::copy(X,R);
|
||||
std::vector<int> ipvt(N);
|
||||
gmm::lu_factor(R, ipvt);
|
||||
}
|
||||
|
||||
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
|
||||
gmm::copy(X,C);
|
||||
gmm::Householder_tridiagonalization(C,X,false);
|
||||
static inline void hessenberg(const gene_matrix & X, gene_matrix & R, int N){
|
||||
gmm::copy(X,R);
|
||||
gmm::Hessenberg_reduction(R,X,false);
|
||||
}
|
||||
|
||||
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & R, int N){
|
||||
gmm::copy(X,R);
|
||||
gmm::Householder_tridiagonalization(R,X,false);
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "bench.hh"
|
||||
#include "basic_actions.hh"
|
||||
#include "action_hessenberg.hh"
|
||||
#include "action_lu_decomp.hh"
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
@ -39,6 +40,8 @@ int main()
|
||||
bench<Action_trisolve<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
//bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
|
||||
|
||||
bench<Action_lu_decomp<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
bench<Action_hessenberg<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_tridiagonalization<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
|
@ -714,17 +714,36 @@ public :
|
||||
{
|
||||
Packet pcoef = ei_pset1(coef);
|
||||
#ifdef PEELING
|
||||
int ANP = (AN/(8*PacketSize))*8*PacketSize;
|
||||
for (int j = 0;j<ANP;j+=PacketSize*8)
|
||||
const int peelSize = 3;
|
||||
int ANP = (AN/(peelSize*PacketSize))*peelSize*PacketSize;
|
||||
float* X1 = X + PacketSize;
|
||||
float* Y1 = Y + PacketSize;
|
||||
float* X2 = X + 2*PacketSize;
|
||||
float* Y2 = Y + 2*PacketSize;
|
||||
Packet x0,x1,x2,y0,y1,y2;
|
||||
for (int j = 0;j<ANP;j+=PacketSize*peelSize)
|
||||
{
|
||||
ei_pstore(&Y[j ], ei_padd(ei_pload(&Y[j ]), ei_pmul(pcoef,ei_pload(&X[j ]))));
|
||||
ei_pstore(&Y[j+ PacketSize], ei_padd(ei_pload(&Y[j+ PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+ PacketSize]))));
|
||||
ei_pstore(&Y[j+2*PacketSize], ei_padd(ei_pload(&Y[j+2*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+2*PacketSize]))));
|
||||
ei_pstore(&Y[j+3*PacketSize], ei_padd(ei_pload(&Y[j+3*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+3*PacketSize]))));
|
||||
ei_pstore(&Y[j+4*PacketSize], ei_padd(ei_pload(&Y[j+4*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+4*PacketSize]))));
|
||||
ei_pstore(&Y[j+5*PacketSize], ei_padd(ei_pload(&Y[j+5*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+5*PacketSize]))));
|
||||
ei_pstore(&Y[j+6*PacketSize], ei_padd(ei_pload(&Y[j+6*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+6*PacketSize]))));
|
||||
ei_pstore(&Y[j+7*PacketSize], ei_padd(ei_pload(&Y[j+7*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+7*PacketSize]))));
|
||||
x0 = ei_pload(X+j);
|
||||
x1 = ei_pload(X1+j);
|
||||
x2 = ei_pload(X2+j);
|
||||
|
||||
y0 = ei_pload(Y+j);
|
||||
y1 = ei_pload(Y1+j);
|
||||
y2 = ei_pload(Y2+j);
|
||||
|
||||
y0 = ei_pmadd(pcoef, x0, y0);
|
||||
y1 = ei_pmadd(pcoef, x1, y1);
|
||||
y2 = ei_pmadd(pcoef, x2, y2);
|
||||
|
||||
ei_pstore(Y+j, y0);
|
||||
ei_pstore(Y1+j, y1);
|
||||
ei_pstore(Y2+j, y2);
|
||||
// ei_pstore(&Y[j+2*PacketSize], ei_padd(ei_pload(&Y[j+2*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+2*PacketSize]))));
|
||||
// ei_pstore(&Y[j+3*PacketSize], ei_padd(ei_pload(&Y[j+3*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+3*PacketSize]))));
|
||||
// ei_pstore(&Y[j+4*PacketSize], ei_padd(ei_pload(&Y[j+4*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+4*PacketSize]))));
|
||||
// ei_pstore(&Y[j+5*PacketSize], ei_padd(ei_pload(&Y[j+5*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+5*PacketSize]))));
|
||||
// ei_pstore(&Y[j+6*PacketSize], ei_padd(ei_pload(&Y[j+6*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+6*PacketSize]))));
|
||||
// ei_pstore(&Y[j+7*PacketSize], ei_padd(ei_pload(&Y[j+7*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+7*PacketSize]))));
|
||||
}
|
||||
for (int j = ANP;j<AN;j+=PacketSize)
|
||||
ei_pstore(&Y[j], ei_padd(ei_pload(&Y[j]), ei_pmul(pcoef,ei_pload(&X[j]))));
|
||||
|
@ -27,7 +27,7 @@
|
||||
#include "action_ata_product.hh"
|
||||
#include "action_aat_product.hh"
|
||||
//#include "action_lu_solve.hh"
|
||||
#include "timers/mixed_perf_analyzer.hh"
|
||||
// #include "timers/mixed_perf_analyzer.hh"
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "bench.hh"
|
||||
#include "basic_actions.hh"
|
||||
#include "action_cholesky.hh"
|
||||
// #include "action_lu_decomp.hh"
|
||||
|
||||
BTL_MAIN;
|
||||
|
||||
@ -37,6 +38,7 @@ int main()
|
||||
|
||||
bench<Action_trisolve<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
bench<Action_cholesky<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
// bench<Action_lu_decomp<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -120,6 +120,12 @@ public :
|
||||
recursive_cholesky(C);
|
||||
}
|
||||
|
||||
// static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){
|
||||
// R = X;
|
||||
// std::vector<int> ipvt(N);
|
||||
// lu_factor(R, ipvt);
|
||||
// }
|
||||
|
||||
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
|
||||
X = lower_trisolve(L, B);
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ public :
|
||||
typedef Vector<real,SIZE> gene_vector;
|
||||
typedef Matrix<real,SIZE,SIZE> gene_matrix;
|
||||
|
||||
static inline std::string name() { return "tvmet"; }
|
||||
static inline std::string name() { return "tiny_tvmet"; }
|
||||
|
||||
static void free_matrix(gene_matrix & A, int N){}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user